##Multivariate resemblance##
#libraries sources needed and stuff
library(vegan)
library(permute)
library(simba)
#for simba, I got a weird thing that the 'mad' object is masked from 'package:stats'
library(cluster)
library(ecodist)
#for ecodist, I got a weird thing that the 'mantle' object is masked from 'package:vegan'
#then I changed working directory from the 'misc' menu, let me check
getwd()
#ok, then I loaded the biostats source file from the 'File' menu
#load env data
envdata<-read.csv('env.csv',header=TRUE,row.names=1)
speabu<-read.csv('abund.csv',header=TRUE,row.names=1)
spetrait<-read.csv('trait.csv',header=TRUE,row.names=1)
#Calculating coefficients of similarity for binary data (pres/abs)
#transform species abundances into presence/absence (binary transformation) using the power method with an exponent =0
##REMEMBER, the first column is text, so get rid of it (i.e.[,-1])
speocc<-data.trans(speabu[,-1],method='power',exp=0,plot=F)
#calculate similarity coefficients
sp.jac<-sim(speocc,method="jaccard")
#look at it
sp.jac
#now try a few different similarity matrices#
sp.sim<-sim(speocc,method="simplematching")
sp.sor<-sim(speocc,method="soerensen")
#how do two types of coefficients compare to eachother? lets plot them..
plot(sp.jac,sp.sim,xlab="Jaccard's coefficient",ylab="Simple Matching coefficient")
#add a 1:1 line
abline(0,1,col='darkgray')
#plot all of the first column (species 1 versus everyone else for both coefficients)
plot(sp.jac[1:45],sp.sim[1:45],xlab="Jaccard's coefficient",ylab="Simple matching coefficient",type="n")
#note: that type=n thing means, don't plot it yet, then the next two lines mean
text(sp.jac[1:45],sp.sim[1:45],row.names=speocc)
abline(0,1,col="darkgray")
#calculate dissimilarity coefficient
sp.bray<-vegdist(speabu[,-1],method='bray')
#distance matrix based on Jaccard's
sp.jacd<-vegdist(speocc,method='jaccard')
#plot jaccard v this distance jaccard thingy
plot(sp.jac,1-sp.jacd)
##Calculating coefficients of similarity for mixed data types##
#looking at trait data
str(spetrait)
#calculate Gower's similarity using the daisy function (in the cluster library)
sptr.gower<-daisy(spetrait,metric="gower")
##Calculating coefficients (dis)similarity for continuous data
#calculating environmental dissimilarity by euclidean distance (most common) and Manhattan distance
env.euc<-vegdist(envdata,method="euclidean")
env.man<-vegdist(envdata,method="manhattan")
#plotting the two distances against eachother
plot(env.euc,env.man,xlab='euclid',ylab='manhat')
abline(0,1)
#converting correlation to a distance
env.dis<-sqrt(2-2*cor(envdata))
##Exercise
#read in file of 31 genes with expression values for many developmental stages
oys.exp<-read.csv("31oystergeneexp.csv",header=TRUE,row.names=1)
#calculate similarity coefficients
oys.sim<-sim(oys.exp,method="simplematching")
oys.sor<-sim(oys.exp,method="soerensen")
#plot them -something is weird here, is it the zero thing?
plot(oys.sim,oys.sor,xlab="simple matching",ylab='sorensen')
abline(0,1,col="darkgray")
#dissimilarity
oys.bray<-vegdist(oys.exp,method='bray')
#distance
oys.euc<-vegdist(oys.exp,method="euclidean")
oys.man<-vegdist(oys.exp,method="manhattan")
#plot the distances
plot(oys.euc,oys.man,xlab='euclid',ylab='manhat')
abline(0,1,col="darkgray")
#transformt the data and calc. coefficients again
oyslog<-data.trans(oys.exp,method='log',plot=F)
oys.euc.log<-vegdist(oyslog,method="euclidean")
plot(oys.euc,oys.euc.log,xlab='euclid',ylab='euclid transform log')
abline(0,1,col='darkgray')